library(modplots)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ─────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.3.3 ✔ purrr 0.3.4
✔ tibble 3.1.8 ✔ dplyr 1.0.10
✔ tidyr 1.1.3 ✔ stringr 1.4.0
✔ readr 1.4.0 ✔ forcats 0.5.1
── Conflicts ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
library(Seurat)
Attaching SeuratObject
library(plotly)
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
# Mitochondrial genes
mt <- c("ENSGALG00000035334", #COX3
"ENSGALG00000032456", #COII
"ENSGALG00000032142", #MT-CO1
"ENSGALG00000032079", #MT-CYB
"ENSGALG00000037838", #ND6
"ENSGALG00000029500", #ND5
"ENSGALG00000036229", #MT-ND4
"ENSGALG00000042478", #ND4L
"ENSGALG00000030436", #ND3
"ENSGALG00000041091", #MT-ATP6
"ENSGALG00000032465", #MT-ATP8
"ENSGALG00000043768", #MT-ND2
"ENSGALG00000042750") #MT-ND1
# volcanoplot thresholds
p.adj <- 0.05
l2fc <- 0.5
# seurat object
my.se <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_seurat_250723.rds")
# cluster labels from B10int and L10int
ctrl_lumb_int_combined_labels <- readRDS("~/spinal_cord_paper/annotations/ctrl_lumb_int_combined_labels.rds")
identical(colnames(my.se), rownames(ctrl_lumb_int_combined_labels))
[1] TRUE
my.se$annot_sample <- ctrl_lumb_int_combined_labels$annot_sample
my.se@active.assay <- "RNA"
We do DE analysis per cluster, contrasting B10 and L10 samples:
markers <- list()
numbers <- list()
composition <-list()
for (i in seq(levels(Idents(my.se)))) {
# subset for individual clusters
mn.se <- subset(x = my.se, idents = levels(Idents(my.se))[i])
mn.se$sample <- str_extract(mn.se$orig.ident, "ctrl|lumb")
composition[[i]] <- mn.se[[]] %>%
select(sample, annot_sample)
Idents(mn.se) <- "sample"
tmp_markers <- FindMarkers(mn.se,
ident.1 = "ctrl",
only.pos = FALSE,
min.pct = 0.25,
logfc.threshold = 0.2,
latent.vars = c("CC.Difference.seurat"),
test.use = "MAST",
assay = "RNA")
# cell numbers per sample
numbers[[i]] <- data.frame(table(mn.se$sample))
tmp_markers <- tmp_markers %>%
rownames_to_column("Gene.stable.ID") %>%
left_join(gnames)
markers[[i]] <- tmp_markers
}
names(markers) <- paste0("cl-", levels(Idents(my.se)))
names(numbers) <- paste0("cl-", levels(Idents(my.se)))
names(composition) <- paste0("cl-", levels(Idents(my.se)))
# bind lists into data frames
lumb_markers <- bind_rows(markers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
lumb_numbers <- bind_rows(numbers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
lumb_composition <- bind_rows(composition, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
saveRDS(lumb_markers, "~/spinal_cord_paper/data/Gg_ctrl_lumb_int_markers.rds")
saveRDS(lumb_numbers, "~/spinal_cord_paper/data/Gg_ctrl_lumb_int_numbers.rds")
saveRDS(lumb_composition, "~/spinal_cord_paper/data/Gg_ctrl_lumb_int_composition.rds")
lumb_markers <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_markers.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
Error in readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_markers.rds") %>% :
could not find function "%>%"
Plot the cluster compositions and the number of marker genes.
DimPlot(my.se, label = TRUE, reduction = "tsne")
ggplot(data = lumb_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 3) +
geom_text(nudge_y = 50, size = 3)
ggplot(data = lumb_markers %>% filter(p_val_adj < 0.05 & avg_log2FC > 0.5), aes(x = cluster, group = cluster)) +
geom_bar() +
facet_wrap("cluster", nrow = 3,scales = "free_x")
NA
NA
NA
gnames[grepl("TUBB3",gnames$Gene.name),]
DimPlot(my.se, label = TRUE, reduction = "tsne")
# TUBB3 expression
VlnPlot(my.se, features = c("ENSGALG00000000059"), pt.size = 0)
# vector of neuronal clusters
neuron_clusters <- c(3,7,17,20,27,28,30)
neuron_lumb_markers <- filter(lumb_markers, clust_id %in% neuron_clusters)
neuron_lumb_numbers <- filter(lumb_numbers, clust_id %in% neuron_clusters)
neuron_lumb_composition <- filter(lumb_compositions, clust_id %in% neuron_clusters)
Error in filter(lumb_compositions, clust_id %in% neuron_clusters) :
object 'lumb_compositions' not found
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
toplot <- neuron_lumb_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none")
toplot <- neuron_lumb_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "lumb",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
lumb = "#419c73",
ns = "grey")
shapes <- c(ctrl = 21,
lumb = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 2, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot, width = 1000, height = 600)
NA
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 2, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot_nomt, width = 1000, height = 600)
NA
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 6096 rows containing missing values (geom_text_repel).
Warning: ggrepel: 48 unlabeled data points (too many overlaps). Consider increasing max.overlaps
pdf("~/spinal_cord_paper/figures/Fig_4_neuron_volcano.pdf", width = 10, height = 7)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 6096 rows containing missing values (geom_text_repel).
Warning: ggrepel: 43 unlabeled data points (too many overlaps). Consider increasing max.overlaps
dev.off()
png
2
Cluster 30 (Consisting of 161 B10 vs 13 L10 cells) shows no MN related markers. Seemingly, those 13 cells are indeed motor neurons. This is supported by the fact of their expression of TUBB3, FOXP1, and SLC18A3.
gnames[grepl("^PLP1$|^MBP$|^FABP9$",gnames$Gene.name),]
DimPlot(my.se, label = TRUE, reduction = "tsne")
# MFOL marker expression
VlnPlot(my.se, features = c("ENSGALG00000000112","ENSGALG00000013640","ENSGALG00000032453"), pt.size = 0, ncol = 1)
# vector of neuronal clusters
MFOL_clusters <- c(16, 18, 34)
MFOL_lumb_markers <- filter(lumb_markers, clust_id %in% MFOL_clusters)
MFOL_lumb_numbers <- filter(lumb_numbers, clust_id %in% MFOL_clusters)
MFOL_lumb_composition <- filter(lumb_composition, clust_id %in% MFOL_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = MFOL_lumb_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10)
toplot <- MFOL_lumb_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none")
toplot <- MFOL_lumb_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "lumb",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
lumb = "#419c73",
ns = "grey")
shapes <- c(ctrl = 21,
lumb = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot, width = 800, height = 500)
NA
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot_nomt, width = 800, height = 500)
NA
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 2944 rows containing missing values (geom_text_repel).
pdf("~/spinal_cord_paper/figures/Fig_4_MFOL_volcano.pdf", width = 10, height = 7)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 2944 rows containing missing values (geom_text_repel).
dev.off()
png
2
# seurat object
my.se <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_poly_int_seurat_250723.rds")
# cluster labels from B10int and L10int
ctrl_poly_int_combined_labels <- readRDS("~/spinal_cord_paper/annotations/ctrl_poly_int_combined_labels.rds")
identical(colnames(my.se), rownames(ctrl_poly_int_combined_labels))
[1] TRUE
my.se$annot_sample <- ctrl_poly_int_combined_labels$annot_sample
my.se@active.assay <- "RNA"
We do DE analysis per cluster, contrasting B10 and P10 samples:
markers <- list()
numbers <- list()
composition <- list()
for (i in seq(levels(Idents(my.se)))) {
# subset for individual clusters
mn.se <- subset(x = my.se, idents = levels(Idents(my.se))[i])
mn.se$sample <- str_extract(mn.se$orig.ident, "ctrl|poly")
composition[[i]] <- mn.se[[]] %>%
select(sample, annot_sample)
Idents(mn.se) <- "sample"
tmp_markers <- FindMarkers(mn.se,
ident.1 = "ctrl",
only.pos = FALSE,
min.pct = 0.25,
logfc.threshold = 0.2,
latent.vars = c("CC.Difference.seurat"),
test.use = "MAST",
assay = "RNA")
# cell numbers per sample
numbers[[i]] <- data.frame(table(mn.se$sample))
tmp_markers <- tmp_markers %>%
rownames_to_column("Gene.stable.ID") %>%
left_join(gnames)
markers[[i]] <- tmp_markers
}
names(markers) <- paste0("cl-", levels(Idents(my.se)))
names(numbers) <- paste0("cl-", levels(Idents(my.se)))
names(composition) <- paste0("cl-", levels(Idents(my.se)))
# bind lists into data frames
poly_markers <- bind_rows(markers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
poly_numbers <- bind_rows(numbers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
poly_composition <- bind_rows(composition, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
saveRDS(poly_markers, "~/spinal_cord_paper/data/Gg_ctrl_poly_int_markers.rds")
saveRDS(poly_numbers, "~/spinal_cord_paper/data/Gg_ctrl_poly_int_numbers.rds")
saveRDS(poly_composition, "~/spinal_cord_paper/data/Gg_ctrl_poly_int_composition.rds")
# load the DE data
poly_markers <- readRDS("~/test/poly_markers.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
poly_numbers <- readRDS("~/test/poly_numbers.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
poly_composition <- readRDS("~/test/poly_composition.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
Plot the cluster compositions and the number of marker genes.
DimPlot(my.se, label = TRUE, reduction = "tsne")
ggplot(data = poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 3) +
geom_text(nudge_y = 50, size = 3)
ggplot(data = poly_markers %>% filter(p_val_adj < 0.05 & avg_log2FC > 0.5), aes(x = cluster, group = cluster)) +
geom_bar() +
facet_wrap("cluster", nrow = 3,scales = "free_x")
gnames[grepl("TUBB3",gnames$Gene.name),]
# TUBB3 expression
VlnPlot(my.se, features = c("ENSGALG00000000059"), pt.size = 0)
# vector of neuronal clusters
neuron_clusters <- c(5,6,11,20,25,27,29,30,32)
neuron_poly_markers <- filter(poly_markers, clust_id %in% neuron_clusters)
neuron_poly_numbers <- filter(poly_numbers, clust_id %in% neuron_clusters)
neuron_poly_composition <- filter(poly_composition, clust_id %in% neuron_clusters)
Barplots showing number of cells from B10 or P10, and the contributions from the individual B10int and P10int clusters:
ggplot(data = neuron_poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10)
toplot <- neuron_poly_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none")
toplot <- neuron_poly_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "poly",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
poly = "goldenrod3",
ns = "grey")
shapes <- c(ctrl = 21,
poly = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 2, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot, width = 1000, height = 600)
Plots without mitochondrial genes:
toplot_nomt <- toplot %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 2, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot_nomt)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 6087 rows containing missing values (geom_text_repel).
pdf("~/spinal_cord_paper/figures/Fig_5_neuron_volcano.pdf", width = 10, height = 7)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 6087 rows containing missing values (geom_text_repel).
dev.off()
png
2
Single plot (no faceting due to low number of DE genes).
volplot_nomt_ind <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = cluster,
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = c("black", "grey", "goldenrod3"))+
scale_shape_manual(values = c(0, 16, 2, 3, 5, 16, 16, 16, 16)) +
xlim(c(-1,1)) +
ylab("-log10(padj)") +
theme_bw() +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
volplot_nomt_ind
Warning: Removed 10 rows containing missing values (geom_point).
Warning: Removed 6087 rows containing missing values (geom_text_repel).
gnames[grepl("^PLP1$|^MBP$|^FABP9$",gnames$Gene.name),]
# MFOL marker expression
VlnPlot(my.se, features = c("ENSGALG00000000112","ENSGALG00000013640","ENSGALG00000032453"), pt.size = 0, ncol = 1)
# vector of neuronal clusters
MFOL_clusters <- c(10, 24)
MFOL_poly_markers <- filter(poly_markers, clust_id %in% MFOL_clusters)
MFOL_poly_numbers <- filter(poly_numbers, clust_id %in% MFOL_clusters)
MFOL_poly_composition <- filter(poly_composition, clust_id %in% MFOL_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = MFOL_poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10)
toplot <- MFOL_poly_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none")
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot_nomt, width = 800, height = 500)
NA
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 406 rows containing missing values (geom_text_repel).
pdf("~/spinal_cord_paper/figures/Fig_5_MFOL_volcano.pdf", width = 10, height = 7)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 1 rows containing missing values (geom_point).
Warning: Removed 406 rows containing missing values (geom_text_repel).
dev.off()
png
2
gnames[grepl("^RSPO1$|^SHH$",gnames$Gene.name),]
# MFOL marker expression
VlnPlot(my.se, features = c("ENSGALG00000001946","ENSGALG00000006379"), pt.size = 0, ncol = 1)
# vector of neuronal clusters
RPFP_clusters <- c(22, 23)
RPFP_poly_markers <- filter(poly_markers, clust_id %in% RPFP_clusters)
RPFP_poly_numbers <- filter(poly_numbers, clust_id %in% RPFP_clusters)
RPFP_poly_composition <- filter(poly_composition, clust_id %in% RPFP_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = RPFP_poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10)
toplot <- RPFP_poly_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none")
toplot <- RPFP_poly_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "poly",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
poly = "goldenrod3",
ns = "grey")
shapes <- c(ctrl = 21,
poly = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot, width = 800, height = 500)
NA
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot_nomt, width = 800, height = 500)
NA
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
pdf("~/spinal_cord_paper/figures/Fig_5_RPFP_volcano.pdf", width = 5, height = 3.5)
volplot_nomt +
NoLegend() +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
BP_RPFP_barplot
dev.off()
Clusters 16 and 18 show the highest numbers of DE genes. They are a progenitor population
# vector of neuronal clusters
NPC_clusters <- c(16, 18)
NPC_poly_markers <- filter(poly_markers, clust_id %in% NPC_clusters)
NPC_poly_numbers <- filter(poly_numbers, clust_id %in% NPC_clusters)
NPC_poly_composition <- filter(poly_composition, clust_id %in% NPC_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = NPC_poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10)
toplot <- NPC_poly_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none")
toplot <- NPC_poly_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "poly",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
poly = "goldenrod3",
ns = "grey")
shapes <- c(ctrl = 21,
poly = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot, width = 800, height = 500)
NA
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()
ggplotly(volplot_nomt, width = 800, height = 500)
NA
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 1899 rows containing missing values (geom_text_repel).
Warning: ggrepel: 43 unlabeled data points (too many overlaps). Consider increasing max.overlaps
Warning: ggrepel: 135 unlabeled data points (too many overlaps). Consider increasing max.overlaps
pdf("~/spinal_cord_paper/figures/Fig_5_NPC_volcano.pdf", width = 10, height = 7)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
Warning: Removed 1899 rows containing missing values (geom_text_repel).
Warning: ggrepel: 17 unlabeled data points (too many overlaps). Consider increasing max.overlaps
Warning: ggrepel: 124 unlabeled data points (too many overlaps). Consider increasing max.overlaps
dev.off()
png
2
# Date and time of Rendering
Sys.time()
sessionInfo()